I have a typical project of predicting the NYC uber/lyft trip demand. The dataset is available from Jan2022 to March 2023. The area is already divided into different locations. and I want the predicted demand for each location every 15 mins
The goal of this project is to predict the demand for Uber/Lyft trips in different locations of NYC every 15 minutes, using a dataset spanning from January 2022 to March 2023. The dataset includes information such as the dispatching base number, pickup datetime, drop-off datetime, pickup location ID, drop-off location ID, SR_Flag, and affiliated base number
import pandas as pd
import glob
import tqdm
import pandas as pd
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from dateutil.relativedelta import relativedelta
import numpy as np
from pmdarima import auto_arima
data_list_path = glob.glob('Datasets/fhv_tripdata_2022-2023_in_csv/*.csv')
list_df = []
for path in data_list_path:
print(path)
# Step 1: Preprocess the Dataset
df = pd.read_csv(path)
list_df.append(df)
df = pd.concat(list_df)
interested_features = ['pickup_datetime','PUlocationID']
df = df[interested_features]
Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-09.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-02.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-04.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-07.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-01.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-06.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-08.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2023-03.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-11.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-12.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2023-02.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-03.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2023-01.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-05.csv Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-10.csv
import pandas as pd
import pmdarima as pm
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
print('Number of Rows Before Removing NaN:', df.shape[0])
removed_nan_df = df.dropna()
print('Number of Rows After Removing NaN:', removed_nan_df.shape[0])
Number of Rows Before Removing NaN: 17712727 Number of Rows After Removing NaN: 4164902
location_ids = removed_nan_df['PUlocationID'].unique().tolist()
loop_count = 0
for lc_id in location_ids:
print('Location ID:', lc_id)
df_subset = removed_nan_df[removed_nan_df['PUlocationID'] == lc_id]
df_subset['pickup_datetime'] = pd.to_datetime(df_subset['pickup_datetime'])
df_subset = df_subset.sort_values('pickup_datetime')
df_subset = df_subset.set_index('pickup_datetime')
df_subset = df_subset['PUlocationID'].resample('1H').count()
df_subset = df_subset.reset_index()
# Split data into training and testing sets
train_size = int(len(df_subset) * 0.95)
train_data = df_subset[:train_size]
test_data = df_subset[train_size:]
# Perform auto ARIMA on training data
model = pm.auto_arima(train_data['PUlocationID'], seasonal=True, trace=True)
# Generate predictions
forecast, conf_int = model.predict(n_periods=len(test_data), return_conf_int=True)
# Plotting
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_data.index, y=train_data['PUlocationID'], mode='lines+markers', name='Training Data'))
fig.add_trace(go.Scatter(x=test_data.index, y=test_data['PUlocationID'], mode='lines+markers', name='Testing Data'))
fig.add_trace(go.Scatter(x=test_data.index, y=forecast, mode='lines+markers', name='ARIMA Forecast'))
fig.update_layout(title=f'PickLocation ID: {lc_id}', xaxis_title='Time', yaxis_title='Number Drives')
fig.show()
loop_count +=1
if loop_count >5:
break
Location ID: 12.0 Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_subset['pickup_datetime'] = pd.to_datetime(df_subset['pickup_datetime'])
ARIMA(2,0,2)(0,0,0)[0] intercept : AIC=-11849.933, Time=18.02 sec ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=-11825.381, Time=5.04 sec ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=-11844.742, Time=10.19 sec ARIMA(0,0,1)(0,0,0)[0] intercept : AIC=-11843.762, Time=4.49 sec ARIMA(0,0,0)(0,0,0)[0] : AIC=-11650.357, Time=0.41 sec ARIMA(1,0,2)(0,0,0)[0] intercept : AIC=-11854.674, Time=35.05 sec ARIMA(0,0,2)(0,0,0)[0] intercept : AIC=-11847.957, Time=7.87 sec ARIMA(1,0,1)(0,0,0)[0] intercept : AIC=-11846.164, Time=6.39 sec ARIMA(1,0,3)(0,0,0)[0] intercept : AIC=-11844.097, Time=17.41 sec ARIMA(0,0,3)(0,0,0)[0] intercept : AIC=-11847.073, Time=8.61 sec ARIMA(2,0,1)(0,0,0)[0] intercept : AIC=-11854.373, Time=20.90 sec ARIMA(2,0,3)(0,0,0)[0] intercept : AIC=-11853.703, Time=65.54 sec ARIMA(1,0,2)(0,0,0)[0] : AIC=inf, Time=2.92 sec Best model: ARIMA(1,0,2)(0,0,0)[0] intercept Total fit time: 202.841 seconds
Location ID: 89.0 Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=50398.076, Time=25.17 sec ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=51768.990, Time=0.61 sec ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=50564.942, Time=1.47 sec ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=50454.990, Time=2.80 sec ARIMA(0,1,0)(0,0,0)[0] : AIC=51766.991, Time=0.24 sec ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=inf, Time=49.71 sec ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=50447.109, Time=8.32 sec ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=50260.815, Time=34.84 sec ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=50442.066, Time=18.64 sec ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=50238.059, Time=61.10 sec ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=50429.196, Time=16.75 sec ARIMA(5,1,2)(0,0,0)[0] intercept : AIC=50165.209, Time=68.43 sec ARIMA(5,1,1)(0,0,0)[0] intercept : AIC=50401.097, Time=19.04 sec ARIMA(5,1,3)(0,0,0)[0] intercept : AIC=inf, Time=89.99 sec ARIMA(4,1,3)(0,0,0)[0] intercept : AIC=inf, Time=69.49 sec ARIMA(5,1,2)(0,0,0)[0] : AIC=50163.210, Time=8.91 sec ARIMA(4,1,2)(0,0,0)[0] : AIC=50236.059, Time=5.33 sec ARIMA(5,1,1)(0,0,0)[0] : AIC=50399.097, Time=3.10 sec ARIMA(5,1,3)(0,0,0)[0] : AIC=inf, Time=9.21 sec ARIMA(4,1,1)(0,0,0)[0] : AIC=50427.197, Time=1.84 sec ARIMA(4,1,3)(0,0,0)[0] : AIC=inf, Time=7.37 sec Best model: ARIMA(5,1,2)(0,0,0)[0] Total fit time: 502.395 seconds
Location ID: 87.0 Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=inf, Time=43.83 sec ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=44697.282, Time=0.62 sec ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=43072.517, Time=1.97 sec ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=42507.709, Time=3.38 sec ARIMA(0,1,0)(0,0,0)[0] : AIC=44695.282, Time=0.25 sec ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=42469.574, Time=6.29 sec ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=inf, Time=37.45 sec ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=inf, Time=40.04 sec ARIMA(0,1,2)(0,0,0)[0] intercept : AIC=42479.063, Time=4.20 sec ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=42724.790, Time=3.08 sec ARIMA(1,1,1)(0,0,0)[0] : AIC=42467.574, Time=0.59 sec ARIMA(0,1,1)(0,0,0)[0] : AIC=42505.709, Time=0.30 sec ARIMA(1,1,0)(0,0,0)[0] : AIC=43070.517, Time=0.26 sec ARIMA(2,1,1)(0,0,0)[0] : AIC=inf, Time=1.45 sec ARIMA(1,1,2)(0,0,0)[0] : AIC=inf, Time=2.59 sec ARIMA(0,1,2)(0,0,0)[0] : AIC=42477.063, Time=0.57 sec ARIMA(2,1,0)(0,0,0)[0] : AIC=42722.790, Time=0.41 sec ARIMA(2,1,2)(0,0,0)[0] : AIC=inf, Time=3.92 sec Best model: ARIMA(1,1,1)(0,0,0)[0] Total fit time: 151.225 seconds
Location ID: 230.0 Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=inf, Time=45.03 sec ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=54510.711, Time=0.64 sec ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=53717.314, Time=1.45 sec ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=53577.196, Time=3.32 sec ARIMA(0,1,0)(0,0,0)[0] : AIC=54508.711, Time=0.25 sec ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=inf, Time=20.54 sec ARIMA(0,1,2)(0,0,0)[0] intercept : AIC=53537.512, Time=4.17 sec ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=inf, Time=33.35 sec ARIMA(0,1,3)(0,0,0)[0] intercept : AIC=53441.672, Time=7.54 sec ARIMA(1,1,3)(0,0,0)[0] intercept : AIC=inf, Time=49.47 sec ARIMA(0,1,4)(0,0,0)[0] intercept : AIC=inf, Time=28.08 sec ARIMA(1,1,4)(0,0,0)[0] intercept : AIC=inf, Time=56.27 sec ARIMA(0,1,3)(0,0,0)[0] : AIC=53439.672, Time=0.84 sec ARIMA(0,1,2)(0,0,0)[0] : AIC=53535.512, Time=0.49 sec ARIMA(1,1,3)(0,0,0)[0] : AIC=inf, Time=2.75 sec ARIMA(0,1,4)(0,0,0)[0] : AIC=inf, Time=2.33 sec ARIMA(1,1,2)(0,0,0)[0] : AIC=inf, Time=1.75 sec ARIMA(1,1,4)(0,0,0)[0] : AIC=inf, Time=4.12 sec Best model: ARIMA(0,1,3)(0,0,0)[0] Total fit time: 262.372 seconds
Location ID: 73.0 Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=inf, Time=41.41 sec ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=39805.648, Time=0.63 sec ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=37163.950, Time=1.76 sec ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=35795.024, Time=4.28 sec ARIMA(0,1,0)(0,0,0)[0] : AIC=39803.648, Time=0.25 sec ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=inf, Time=18.92 sec ARIMA(0,1,2)(0,0,0)[0] intercept : AIC=35790.271, Time=6.44 sec ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=35798.722, Time=6.41 sec ARIMA(0,1,3)(0,0,0)[0] intercept : AIC=inf, Time=32.70 sec ARIMA(1,1,3)(0,0,0)[0] intercept : AIC=inf, Time=51.26 sec ARIMA(0,1,2)(0,0,0)[0] : AIC=35788.271, Time=0.84 sec ARIMA(0,1,1)(0,0,0)[0] : AIC=35793.025, Time=0.57 sec ARIMA(1,1,2)(0,0,0)[0] : AIC=35796.722, Time=0.76 sec ARIMA(0,1,3)(0,0,0)[0] : AIC=inf, Time=1.83 sec ARIMA(1,1,1)(0,0,0)[0] : AIC=inf, Time=1.57 sec ARIMA(1,1,3)(0,0,0)[0] : AIC=inf, Time=3.69 sec Best model: ARIMA(0,1,2)(0,0,0)[0] Total fit time: 173.335 seconds
Location ID: 93.0 Performing stepwise search to minimize aic
/tmp/ipykernel_19459/1656292523.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ARIMA(2,1,2)(0,0,0)[0] intercept : AIC=17506.196, Time=23.59 sec ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=22029.528, Time=3.02 sec ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=20310.752, Time=2.13 sec ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=18319.407, Time=8.53 sec ARIMA(0,1,0)(0,0,0)[0] : AIC=22027.528, Time=0.26 sec ARIMA(1,1,2)(0,0,0)[0] intercept : AIC=17518.186, Time=34.55 sec ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=17514.342, Time=48.60 sec ARIMA(3,1,2)(0,0,0)[0] intercept : AIC=17505.625, Time=35.38 sec ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=17509.453, Time=42.82 sec ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=17507.371, Time=64.82 sec ARIMA(3,1,3)(0,0,0)[0] intercept : AIC=17535.807, Time=47.20 sec ARIMA(2,1,3)(0,0,0)[0] intercept : AIC=17506.317, Time=56.05 sec ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=17509.439, Time=55.24 sec ARIMA(4,1,3)(0,0,0)[0] intercept : AIC=17519.849, Time=63.61 sec ARIMA(3,1,2)(0,0,0)[0] : AIC=17503.622, Time=1.53 sec ARIMA(2,1,2)(0,0,0)[0] : AIC=17502.485, Time=2.20 sec ARIMA(1,1,2)(0,0,0)[0] : AIC=17516.180, Time=1.20 sec ARIMA(2,1,1)(0,0,0)[0] : AIC=17512.341, Time=1.21 sec ARIMA(2,1,3)(0,0,0)[0] : AIC=17503.699, Time=4.91 sec ARIMA(1,1,1)(0,0,0)[0] : AIC=17534.014, Time=0.84 sec ARIMA(1,1,3)(0,0,0)[0] : AIC=17507.498, Time=3.46 sec ARIMA(3,1,1)(0,0,0)[0] : AIC=17507.444, Time=1.83 sec ARIMA(3,1,3)(0,0,0)[0] : AIC=17511.641, Time=6.55 sec Best model: ARIMA(2,1,2)(0,0,0)[0] Total fit time: 509.552 seconds
df_subset
| pickup_datetime | PUlocationID | |
|---|---|---|
| 0 | 2022-01-01 04:00:00 | 1 |
| 1 | 2022-01-01 05:00:00 | 0 |
| 2 | 2022-01-01 06:00:00 | 0 |
| 3 | 2022-01-01 07:00:00 | 0 |
| 4 | 2022-01-01 08:00:00 | 0 |
| ... | ... | ... |
| 10911 | 2023-03-31 19:00:00 | 1 |
| 10912 | 2023-03-31 20:00:00 | 0 |
| 10913 | 2023-03-31 21:00:00 | 0 |
| 10914 | 2023-03-31 22:00:00 | 0 |
| 10915 | 2023-03-31 23:00:00 | 1 |
10916 rows × 2 columns
df[df['PUlocationID']==12.0]
| pickup_datetime | PUlocationID | |
|---|---|---|
| 12 | 2022-09-01 00:22:02 | 12.0 |
| 33097 | 2022-09-01 16:26:58 | 12.0 |
| 105815 | 2022-09-03 16:21:51 | 12.0 |
| 133578 | 2022-09-04 15:17:07 | 12.0 |
| 138562 | 2022-09-04 19:42:16 | 12.0 |
| ... | ... | ... |
| 642865 | 2022-10-18 14:55:31 | 12.0 |
| 826714 | 2022-10-23 01:36:25 | 12.0 |
| 841406 | 2022-10-23 13:25:39 | 12.0 |
| 1020739 | 2022-10-27 16:48:35 | 12.0 |
| 1030442 | 2022-10-27 23:40:38 | 12.0 |
200 rows × 2 columns
df_subset.values
array([[Timestamp('2022-01-01 04:00:00'), 1],
[Timestamp('2022-01-01 05:00:00'), 0],
[Timestamp('2022-01-01 06:00:00'), 0],
...,
[Timestamp('2023-03-31 21:00:00'), 0],
[Timestamp('2023-03-31 22:00:00'), 0],
[Timestamp('2023-03-31 23:00:00'), 1]], dtype=object)
df_subset
| pickup_datetime | PUlocationID | |
|---|---|---|
| 0 | 2022-01-01 04:00:00 | 1 |
| 1 | 2022-01-01 05:00:00 | 0 |
| 2 | 2022-01-01 06:00:00 | 0 |
| 3 | 2022-01-01 07:00:00 | 0 |
| 4 | 2022-01-01 08:00:00 | 0 |
| ... | ... | ... |
| 10911 | 2023-03-31 19:00:00 | 1 |
| 10912 | 2023-03-31 20:00:00 | 0 |
| 10913 | 2023-03-31 21:00:00 | 0 |
| 10914 | 2023-03-31 22:00:00 | 0 |
| 10915 | 2023-03-31 23:00:00 | 1 |
10916 rows × 2 columns
df = df_subset
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'])
df = df.set_index('pickup_datetime')
df['pickups_per_hour'] = df['PUlocationID'].resample('3H').count()
df
| PUlocationID | pickups_per_hour | |
|---|---|---|
| pickup_datetime | ||
| 2022-01-01 04:00:00 | 1 | NaN |
| 2022-01-01 05:00:00 | 0 | NaN |
| 2022-01-01 06:00:00 | 0 | 3.0 |
| 2022-01-01 07:00:00 | 0 | NaN |
| 2022-01-01 08:00:00 | 0 | NaN |
| ... | ... | ... |
| 2023-03-31 19:00:00 | 1 | NaN |
| 2023-03-31 20:00:00 | 0 | NaN |
| 2023-03-31 21:00:00 | 0 | 3.0 |
| 2023-03-31 22:00:00 | 0 | NaN |
| 2023-03-31 23:00:00 | 1 | NaN |
10916 rows × 2 columns
df
| PUlocationID | pickups_per_hour | |
|---|---|---|
| pickup_datetime | ||
| 2022-01-01 04:00:00 | 1 | NaN |
| 2022-01-01 05:00:00 | 0 | NaN |
| 2022-01-01 06:00:00 | 0 | 3.0 |
| 2022-01-01 07:00:00 | 0 | NaN |
| 2022-01-01 08:00:00 | 0 | NaN |
| ... | ... | ... |
| 2023-03-31 19:00:00 | 1 | NaN |
| 2023-03-31 20:00:00 | 0 | NaN |
| 2023-03-31 21:00:00 | 0 | 3.0 |
| 2023-03-31 22:00:00 | 0 | NaN |
| 2023-03-31 23:00:00 | 1 | NaN |
10916 rows × 2 columns
import pandas as pd
import matplotlib.pyplot as plt
# Step 1: Preprocess the Dataset
df = pd.read_csv('Datasets/fhv_tripdata_2022-2023_in_csv/fhv_tripdata_2022-01.csv')
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'])
df['dropOff_datetime'] = pd.to_datetime(df['dropOff_datetime'])
df.set_index('pickup_datetime', inplace=True)
# Step 2: Resample the Dataset
demand_15_mints = df[['PUlocationID', 'DOlocationID']].resample('15T').size()
demand_30_mints = df[['PUlocationID', 'DOlocationID']].resample('30T').size()
demand_60_mints = df[['PUlocationID', 'DOlocationID']].resample('1h').size()
# Step 3: Predict the Demand (using your preferred model)
# Step 4: Visualize the Demand
demand_15_mints.plot(figsize=(12, 6))
plt.xlabel('Time')
plt.ylabel('Demand')
plt.title('NYC Uber/Lyft Trip Demand')
plt.show()
# Step 4: Visualize the Demand
demand_30_mints.plot(figsize=(12, 6))
plt.xlabel('Time')
plt.ylabel('Demand')
plt.title('NYC Uber/Lyft Trip Demand')
plt.show()
# Step 4: Visualize the Demand
demand_60_mints.plot(figsize=(12, 6))
plt.xlabel('Time')
plt.ylabel('Demand')
plt.title('NYC Uber/Lyft Trip Demand')
plt.show()
demand_30_mints
pickup_datetime
2022-01-01 00:00:00 418
2022-01-01 00:30:00 488
2022-01-01 01:00:00 435
2022-01-01 01:30:00 417
2022-01-01 02:00:00 341
...
2022-01-31 21:30:00 518
2022-01-31 22:00:00 507
2022-01-31 22:30:00 456
2022-01-31 23:00:00 439
2022-01-31 23:30:00 336
Freq: 30T, Length: 1488, dtype: int64